---
title: "Acculturation Orientations and Latino Political Behavior"
subtitle: "Political Profiling of GMM k=4 Cluster Solution"
author: "Jessala A. Grijalva"
date: "`r format(Sys.Date(), '%B %d, %Y')`"
format:
  pdf:
    toc: true
    toc-depth: 3
    number-sections: true
    geometry: margin=1in
    colorlinks: true
    keep-tex: false
execute:
  warning: false
  message: false
  echo: false
---

```{r setup}
#| label: setup

library(here)
library(pacman)
p_load(tidyverse, knitr, kableExtra, rstatix, car)

# Global parameters
set.seed(2500)
SEED <- 2500
n_boot <- 1000

# ── Publishing Standards ──
# Grayscale palette (locked across all manuscripts)
ORIENTATION_FILLS <- c(
  "Culture Affirming" = "gray15",
  "Assimilationist"   = "gray40",
  "Demicultural"      = "gray65",
  "Bicultural"        = "gray85"
)
ORIENTATION_SHAPES <- c(
  "Culture Affirming" = 16,  # filled circle
  "Assimilationist"   = 17,  # filled triangle
  "Demicultural"      = 15,  # filled square
  "Bicultural"        = 18   # filled diamond
)
ORIENTATION_LINETYPES <- c(
  "Culture Affirming" = "solid",
  "Assimilationist"   = "dashed",
  "Demicultural"      = "dotted",
  "Bicultural"        = "dotdash"
)

# Human-readable variable labels
VAR_LABELS <- c(
  "IDEOLOGY"  = "Ideology",
  "PARTYID"   = "Party Identification",
  "IMMVIEW"   = "Immigration View",
  "DREAMACT"  = "DREAM Act Support",
  "IMMPOLICY" = "Immigration Policy"
)

# Political DVs (not used in clustering -- these are outcome measures)
dvs <- c("IDEOLOGY", "PARTYID", "IMMVIEW", "DREAMACT", "IMMPOLICY")

# Helper: apply VAR_LABELS to a variable name vector
label_vars <- function(x) unname(VAR_LABELS[x])
```

\newpage

# Overview

**Purpose:** Analyze political profiles across the four acculturation
orientations identified in Phase 2 (Culture Affirming, Assimilationist,
Bicultural, Demicultural). This document supports a standalone manuscript
examining how bidirectional acculturation orientations predict political
attitudes and behavior among Latinos.

**Input:** `data/processed/bam_clustered_gmm_k4.rda` (produced by Phase 2
of the BAM pipeline in the `acculturation-bam-scale` repository).

**Analysis proceeds in four phases:**

- Phase 1: Within-orientation political profiles
- Phase 2: Between-orientation comparisons (Kruskal-Wallis, Cliff's delta, Cram\'{e}r's V)
- Phase 3: Theoretical dimension contrasts (Heritage vs. American)
- Phase 4: Hypothesis evaluation

---

# Load and Prepare Data

```{r load-data}
#| label: load-data

# Load the clustered dataset produced by Phase 2
load(here("data", "processed", "bam_clustered_gmm_k4.rda"))

# Phase 2 saves 'bam_clustered' with:
#   cluster_gmm_k4  (integer 1-4)
#   orientation      (labeled factor)
#
# Rename to 'df' for cleaner code and use orientation directly.
df <- bam_clustered %>%
  filter(!is.na(orientation)) %>%
  rename(cluster = orientation)

# ── Verify label mapping ──
# After Phase 2 renders, check the means table to confirm this mapping:
#   1 = Culture Affirming   (high heritage, low American aspiration)
#   2 = Assimilationist     (high American, low heritage aspiration)
#   3 = Demicultural        (moderate both, low Spanish retention)
#   4 = Bicultural          (high both, largest group ~68%)
#
# If the numbering shifted, update the case_when in Phase 2 and re-render.

# Sample sizes
n_total <- nrow(df)
cluster_ns <- df %>% count(cluster) %>% mutate(pct = n / sum(n) * 100)

cat("Total observations:", n_total, "\n")
cat("Orientation distribution:\n")
print(cluster_ns)
```

## Verify Data

```{r verify-data}
#| label: verify-data

# Verify all DVs exist
missing_dvs <- setdiff(dvs, names(df))
if (length(missing_dvs) > 0) {
  warning("Missing DVs: ", paste(missing_dvs, collapse = ", "))
  dvs <- intersect(dvs, names(df))
}

# Variable coding reference:
# IDEOLOGY:  1=Conservative, 2=Moderate, 3=Liberal
# PARTYID:   1=Republican, 2=Independent, 3=Democrat
# IMMVIEW:   1=Burden, 2=Strengthen
# DREAMACT:  1-4 scale (higher = more support)
# IMMPOLICY: 1-5 scale (higher = more permissive)

cat("=== Variable Ranges ===\n")
sapply(df[dvs], function(x) range(as.numeric(x), na.rm = TRUE))
```

\newpage

# Phase 1: Within-Orientation Political Profiles

**Purpose:** Characterize the political composition of each orientation.

## Sample Distribution

```{r tbl-sample-distribution}
#| label: tbl-sample-distribution
#| tbl-cap: "Sample Distribution by Acculturation Orientation"

cluster_ns %>%
  mutate(pct = round(pct, 1)) %>%
  rename(Orientation = cluster, N = n, `%` = pct) %>%
  kable(booktabs = TRUE, align = c("l", "r", "r")) %>%
  kable_styling(latex_options = c("hold_position"), full_width = FALSE)
```

\noindent \textit{Note:} $N$ = `r n_total`. Orientations derived from GMM $k$ = 4 solution (Phase 2).

## Ideology

```{r ideology-distribution}
#| label: ideology-distribution

ideology_dist <- df %>%
  filter(!is.na(IDEOLOGY)) %>%
  group_by(cluster) %>%
  summarise(
    n = n(),
    pct_conservative = mean(IDEOLOGY == 1) * 100,
    pct_moderate = mean(IDEOLOGY == 2) * 100,
    pct_liberal = mean(IDEOLOGY == 3) * 100,
    mean = mean(IDEOLOGY),
    sd = sd(IDEOLOGY),
    .groups = "drop"
  )

ideology_dist %>%
  mutate(across(starts_with("pct"), ~ round(.x, 1)),
         across(c(mean, sd), ~ round(.x, 2))) %>%
  kable(booktabs = TRUE,
        col.names = c("Orientation", "n", "% Cons.", "% Mod.", "% Lib.", "M", "SD"),
        align = c("l", rep("r", 6))) %>%
  kable_styling(latex_options = c("hold_position"), full_width = FALSE)
```

\noindent \textit{Note:} Ideology coded 1 = Conservative, 2 = Moderate, 3 = Liberal.

## Party Identification

```{r partyid-distribution}
#| label: partyid-distribution

partyid_dist <- df %>%
  filter(!is.na(PARTYID)) %>%
  group_by(cluster) %>%
  summarise(
    n = n(),
    pct_republican = mean(PARTYID == 1) * 100,
    pct_independent = mean(PARTYID == 2) * 100,
    pct_democrat = mean(PARTYID == 3) * 100,
    mean = mean(PARTYID),
    sd = sd(PARTYID),
    .groups = "drop"
  )

partyid_dist %>%
  mutate(across(starts_with("pct"), ~ round(.x, 1)),
         across(c(mean, sd), ~ round(.x, 2))) %>%
  kable(booktabs = TRUE,
        col.names = c("Orientation", "n", "% Rep.", "% Ind.", "% Dem.", "M", "SD"),
        align = c("l", rep("r", 6))) %>%
  kable_styling(latex_options = c("hold_position"), full_width = FALSE)
```

\noindent \textit{Note:} Party ID coded 1 = Republican, 2 = Independent, 3 = Democrat.

## Immigration Views

```{r immview-distribution}
#| label: immview-distribution

immview_dist <- df %>%
  filter(!is.na(IMMVIEW)) %>%
  group_by(cluster) %>%
  summarise(
    n = n(),
    pct_burden = mean(IMMVIEW == 1) * 100,
    pct_strengthen = mean(IMMVIEW == 2) * 100,
    mean = mean(IMMVIEW),
    sd = sd(IMMVIEW),
    .groups = "drop"
  )

immview_dist %>%
  mutate(across(starts_with("pct"), ~ round(.x, 1)),
         across(c(mean, sd), ~ round(.x, 2))) %>%
  kable(booktabs = TRUE,
        col.names = c("Orientation", "n", "% Burden", "% Strengthen", "M", "SD"),
        align = c("l", rep("r", 5))) %>%
  kable_styling(latex_options = c("hold_position"), full_width = FALSE)
```

\noindent \textit{Note:} Immigration view coded 1 = Burden, 2 = Strengthen country.

## DREAM Act Support

```{r dreamact-distribution}
#| label: dreamact-distribution

dreamact_dist <- df %>%
  filter(!is.na(DREAMACT)) %>%
  group_by(cluster) %>%
  summarise(
    n = n(),
    mean = mean(DREAMACT),
    sd = sd(DREAMACT),
    pct_support = mean(DREAMACT >= 3) * 100,
    .groups = "drop"
  )

dreamact_dist %>%
  mutate(across(c(mean, sd), ~ round(.x, 2)),
         pct_support = round(pct_support, 1)) %>%
  kable(booktabs = TRUE,
        col.names = c("Orientation", "n", "M", "SD", "% Support ($\\geq$3)"),
        escape = FALSE,
        align = c("l", rep("r", 4))) %>%
  kable_styling(latex_options = c("hold_position"), full_width = FALSE)
```

\noindent \textit{Note:} DREAM Act support on 1--4 scale (higher = more support).

## Immigration Policy

```{r immpolicy-distribution}
#| label: immpolicy-distribution

immpolicy_dist <- df %>%
  filter(!is.na(IMMPOLICY)) %>%
  group_by(cluster) %>%
  summarise(
    n = n(),
    mean = mean(IMMPOLICY),
    sd = sd(IMMPOLICY),
    pct_permissive = mean(IMMPOLICY >= 4) * 100,
    .groups = "drop"
  )

immpolicy_dist %>%
  mutate(across(c(mean, sd), ~ round(.x, 2)),
         pct_permissive = round(pct_permissive, 1)) %>%
  kable(booktabs = TRUE,
        col.names = c("Orientation", "n", "M", "SD", "% Permissive ($\\geq$4)"),
        escape = FALSE,
        align = c("l", rep("r", 4))) %>%
  kable_styling(latex_options = c("hold_position"), full_width = FALSE)
```

\noindent \textit{Note:} Immigration policy on 1--5 scale (higher = more permissive).

## Summary Profile Table

```{r tbl-summary-profiles}
#| label: tbl-summary-profiles
#| tbl-cap: "Summary Political Profiles by Acculturation Orientation"

summary_profiles <- df %>%
  group_by(cluster) %>%
  summarise(
    n = n(),
    pct_moderate = mean(IDEOLOGY == 2, na.rm = TRUE) * 100,
    pct_democrat = mean(PARTYID == 3, na.rm = TRUE) * 100,
    pct_pro_immigrant = mean(IMMVIEW == 2, na.rm = TRUE) * 100,
    pct_dream_support = mean(DREAMACT >= 3, na.rm = TRUE) * 100,
    pct_permissive = mean(IMMPOLICY >= 4, na.rm = TRUE) * 100,
    .groups = "drop"
  )

summary_profiles %>%
  mutate(across(starts_with("pct"), ~ round(.x, 1))) %>%
  kable(booktabs = TRUE,
        col.names = c("Orientation", "n", "% Moderate", "% Democrat",
                       "% Pro-Imm.", "% DREAM", "% Permissive"),
        align = c("l", rep("r", 6))) %>%
  kable_styling(latex_options = c("hold_position", "scale_down"),
                full_width = FALSE)
```

\noindent \textit{Note:} $N$ = `r n_total`. Pro-Imm. = views immigrants as strengthening the country. DREAM = supports DREAM Act ($\geq$ 3 on 1--4 scale). Permissive = favors permissive immigration policy ($\geq$ 4 on 1--5 scale).

## Profile Figure

```{r fig-profiles}
#| label: fig-profiles
#| fig-cap: "Political Profiles by Acculturation Orientation"
#| fig-width: 10
#| fig-height: 6

profile_long <- summary_profiles %>%
  pivot_longer(
    cols = starts_with("pct"),
    names_to = "measure",
    values_to = "percentage"
  ) %>%
  mutate(
    measure = case_when(
      measure == "pct_moderate" ~ "% Moderate",
      measure == "pct_democrat" ~ "% Democrat",
      measure == "pct_pro_immigrant" ~ "% Pro-Immigrant",
      measure == "pct_dream_support" ~ "% DREAM Support",
      measure == "pct_permissive" ~ "% Permissive"
    ),
    measure = factor(measure, levels = c("% Moderate", "% Democrat",
                                          "% Pro-Immigrant", "% DREAM Support",
                                          "% Permissive"))
  )

p_profiles <- ggplot(profile_long, aes(x = measure, y = percentage, fill = cluster)) +
  geom_col(position = position_dodge(width = 0.8), width = 0.7, color = "black",
           linewidth = 0.3) +
  geom_text(aes(label = round(percentage, 0)),
            position = position_dodge(width = 0.8), vjust = -0.5, size = 3) +
  scale_fill_manual(values = ORIENTATION_FILLS) +
  scale_y_continuous(limits = c(0, 100), expand = c(0, 0)) +
  labs(x = NULL, y = "Percentage", fill = "Orientation") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "bottom")

p_profiles

ggsave(here("figures", "fig-profiles.png"), p_profiles,
       width = 10, height = 6, dpi = 600)
```

\newpage

# Phase 2: Between-Orientation Comparisons

**Purpose:** Quantify differences between orientations and assess differentiation.

## Omnibus Tests (Kruskal-Wallis)

```{r tbl-kruskal-wallis}
#| label: tbl-kruskal-wallis
#| tbl-cap: "Omnibus Test Results (Kruskal-Wallis)"

kw_results <- map_dfr(dvs, function(dv) {
  test <- kruskal.test(reformulate("cluster", response = dv), data = df)
  tibble(
    Variable = dv,
    Chi_sq = test$statistic,
    df = test$parameter,
    p_value = test$p.value
  )
})

kw_results %>%
  mutate(
    Variable = label_vars(Variable),
    Chi_sq = round(Chi_sq, 2),
    p_value = ifelse(p_value < 0.001, "< .001", sprintf("%.3f", p_value))
  ) %>%
  kable(booktabs = TRUE,
        col.names = c("Variable", "$\\chi^2$", "df", "$p$"),
        escape = FALSE,
        align = c("l", "r", "r", "r")) %>%
  kable_styling(latex_options = c("hold_position"), full_width = FALSE)
```

\noindent \textit{Note:} Kruskal-Wallis rank-sum tests with $k$ = 4 acculturation orientations. df = 3 for all tests.

## Pairwise Effect Sizes (Cliff's Delta)

```{r cliffs-delta-functions}
#| label: cliffs-delta-functions

# Cliff's Delta: nonparametric effect size [-1, 1]
cliffs_delta <- function(x, y) {
  x <- na.omit(x)
  y <- na.omit(y)
  dominance <- outer(x, y, FUN = function(a, b) sign(a - b))
  sum(dominance) / (length(x) * length(y))
}

# Bootstrap CI for Cliff's Delta
bootstrap_delta_ci <- function(x, y, n_boot = 1000, ci = 0.95) {
  x <- na.omit(x)
  y <- na.omit(y)
  deltas <- replicate(n_boot, {
    x_boot <- sample(x, length(x), replace = TRUE)
    y_boot <- sample(y, length(y), replace = TRUE)
    cliffs_delta(x_boot, y_boot)
  })
  alpha <- (1 - ci) / 2
  quantile(deltas, c(alpha, 1 - alpha))
}
```

```{r pairwise-comparisons}
#| label: pairwise-comparisons

clusters_vec <- levels(df$cluster)
pairs <- combn(clusters_vec, 2, simplify = FALSE)

# 6 pairs x 5 DVs = 30 comparisons
pairwise_results <- map_dfr(dvs, function(dv) {
  map_dfr(pairs, function(pair) {
    g1 <- df %>% filter(cluster == pair[1]) %>% pull(!!sym(dv))
    g2 <- df %>% filter(cluster == pair[2]) %>% pull(!!sym(dv))

    delta <- cliffs_delta(g1, g2)
    ci <- bootstrap_delta_ci(g1, g2, n_boot = n_boot)

    tibble(
      Variable = dv,
      Group1 = pair[1],
      Group2 = pair[2],
      Delta = delta,
      CI_low = ci[1],
      CI_high = ci[2],
      Significant = ifelse(ci[1] > 0 | ci[2] < 0, "Yes", "No"),
      Magnitude = case_when(
        abs(delta) >= 0.43 ~ "Large",
        abs(delta) >= 0.28 ~ "Medium",
        abs(delta) >= 0.11 ~ "Small",
        TRUE ~ "Negligible"
      )
    )
  })
})
```

```{r tbl-pairwise-all}
#| label: tbl-pairwise-all
#| tbl-cap: "Pairwise Effect Sizes (All 30 Comparisons)"

pairwise_results %>%
  mutate(
    Variable = label_vars(Variable),
    across(c(Delta, CI_low, CI_high), ~ round(.x, 2)),
    Comparison = paste0(substr(Group1, 1, 3), " vs ", substr(Group2, 1, 3)),
    CI = paste0("[", CI_low, ", ", CI_high, "]")
  ) %>%
  select(Variable, Comparison, Delta, CI, Significant, Magnitude) %>%
  kable(booktabs = TRUE,
        col.names = c("Variable", "Comparison", "$\\delta$", "95\\% CI",
                       "Sig.", "Magnitude"),
        escape = FALSE,
        align = c("l", "l", "r", "c", "c", "l")) %>%
  kable_styling(latex_options = c("hold_position", "scale_down"),
                full_width = FALSE, font_size = 9)
```

\noindent \textit{Note:} Cliff's $\delta$ with 95\% bootstrap CIs ($B$ = `r format(n_boot, big.mark = ",")`, seed = `r SEED`). Significance determined by CI excluding zero. Magnitude thresholds: $|\delta| \geq$ 0.11 (small), $\geq$ 0.28 (medium), $\geq$ 0.43 (large).

## FDR-Corrected Pairwise Comparisons

```{r fdr-correction}
#| label: fdr-correction

pairwise_wilcox_results <- map_dfr(dvs, function(dv) {
  pw_test <- pairwise.wilcox.test(
    df[[dv]], df$cluster,
    p.adjust.method = "none", exact = FALSE
  )
  p_matrix <- pw_test$p.value
  comparisons <- which(!is.na(p_matrix), arr.ind = TRUE)

  map_dfr(seq_len(nrow(comparisons)), function(i) {
    row_idx <- comparisons[i, 1]
    col_idx <- comparisons[i, 2]
    tibble(
      Variable = dv,
      Group1 = colnames(p_matrix)[col_idx],
      Group2 = rownames(p_matrix)[row_idx],
      p_raw = p_matrix[row_idx, col_idx]
    )
  })
})

pairwise_wilcox_results <- pairwise_wilcox_results %>%
  mutate(
    p_fdr = p.adjust(p_raw, method = "BH"),
    Sig_raw = ifelse(p_raw < .05, "Yes", "No"),
    Sig_fdr = ifelse(p_fdr < .05, "Yes", "No")
  )

cat("=== FDR Correction Summary ===\n")
cat("Significant at p < .05 (uncorrected):",
    sum(pairwise_wilcox_results$Sig_raw == "Yes"), "of 30\n")
cat("Significant at p < .05 (FDR-corrected):",
    sum(pairwise_wilcox_results$Sig_fdr == "Yes"), "of 30\n")

# Merge with Cliff's delta results
pairwise_results_full <- pairwise_results %>%
  left_join(
    pairwise_wilcox_results %>%
      select(Variable, Group1, Group2, p_raw, p_fdr, Sig_fdr),
    by = c("Variable", "Group1", "Group2")
  )

pairwise_results_full %>%
  mutate(
    Variable = label_vars(Variable),
    across(c(Delta, CI_low, CI_high), ~ round(.x, 2)),
    p_raw = ifelse(p_raw < 0.001, "< .001", sprintf("%.3f", p_raw)),
    p_fdr = ifelse(as.numeric(p_fdr) < 0.001, "< .001", sprintf("%.3f", p_fdr)),
    Comparison = paste0(substr(Group1, 1, 3), " vs ", substr(Group2, 1, 3)),
    CI = paste0("[", CI_low, ", ", CI_high, "]")
  ) %>%
  select(Variable, Comparison, Delta, CI, p_fdr, Sig_fdr, Magnitude) %>%
  kable(booktabs = TRUE,
        col.names = c("Variable", "Comparison", "$\\delta$", "95\\% CI",
                       "$p_{\\text{FDR}}$", "Sig.", "Magnitude"),
        escape = FALSE,
        align = c("l", "l", "r", "c", "r", "c", "l")) %>%
  kable_styling(latex_options = c("hold_position", "scale_down"),
                full_width = FALSE, font_size = 8)
```

\noindent \textit{Note:} Wilcoxon rank-sum tests with Benjamini-Hochberg FDR correction across 30 comparisons.

## Top Effect Sizes

```{r tbl-top-effects}
#| label: tbl-top-effects
#| tbl-cap: "Top 10 Effect Sizes (Ranked by $|\\delta|$)"

top_effects <- pairwise_results %>%
  mutate(abs_delta = abs(Delta)) %>%
  arrange(desc(abs_delta)) %>%
  head(10) %>%
  select(-abs_delta)

top_effects %>%
  mutate(
    Variable = label_vars(Variable),
    across(c(Delta, CI_low, CI_high), ~ round(.x, 2)),
    Comparison = paste0(substr(Group1, 1, 3), " vs ", substr(Group2, 1, 3)),
    CI = paste0("[", CI_low, ", ", CI_high, "]")
  ) %>%
  select(Variable, Comparison, Delta, CI, Significant, Magnitude) %>%
  kable(booktabs = TRUE,
        col.names = c("Variable", "Comparison", "$\\delta$", "95\\% CI",
                       "Sig.", "Magnitude"),
        escape = FALSE,
        align = c("l", "l", "r", "c", "c", "l")) %>%
  kable_styling(latex_options = c("hold_position", "scale_down"),
                full_width = FALSE)
```

\noindent \textit{Note:} Cliff's $\delta$ with 95\% bootstrap CIs. Ranked by absolute effect size.

## Pairwise Effects Figure

```{r fig-pairwise-effects}
#| label: fig-pairwise-effects
#| fig-cap: "Pairwise Effect Sizes by Political Outcome"
#| fig-width: 12
#| fig-height: 8

pairwise_plot <- pairwise_results %>%
  mutate(
    Variable = label_vars(Variable),
    Comparison = paste0(substr(Group1, 1, 3), " vs ", substr(Group2, 1, 3))
  )

p_pairwise <- ggplot(pairwise_plot, aes(x = Delta, y = Comparison,
                                         color = Significant)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray50") +
  geom_errorbarh(aes(xmin = CI_low, xmax = CI_high), height = 0.2) +
  geom_point(size = 2) +
  scale_color_manual(values = c("Yes" = "black", "No" = "gray60")) +
  facet_wrap(~ Variable, ncol = 5) +
  labs(x = expression("Cliff's " * delta), y = NULL, color = "Significant") +
  theme_minimal() +
  theme(legend.position = "bottom",
        strip.text = element_text(face = "bold"))

p_pairwise

ggsave(here("figures", "fig-pairwise-effects.png"), p_pairwise,
       width = 12, height = 8, dpi = 600)
```

## Association Strength (Cram\'{e}r's V)

```{r cramers-v}
#| label: cramers-v

cramers_v <- function(x, y) {
  tbl <- table(x, y)
  chi_sq <- suppressWarnings(chisq.test(tbl, correct = FALSE)$statistic)
  n <- sum(tbl)
  min_dim <- min(nrow(tbl) - 1, ncol(tbl) - 1)
  as.numeric(sqrt(chi_sq / (n * min_dim)))
}

bootstrap_v_ci <- function(x, y, n_boot = 1000, ci = 0.95) {
  complete <- complete.cases(x, y)
  x <- x[complete]; y <- y[complete]
  vs <- replicate(n_boot, {
    idx <- sample(length(x), replace = TRUE)
    cramers_v(x[idx], y[idx])
  })
  alpha <- (1 - ci) / 2
  quantile(vs, c(alpha, 1 - alpha), na.rm = TRUE)
}
```

```{r tbl-cramers-v}
#| label: tbl-cramers-v
#| tbl-cap: "Association Strength (Cram\\'{e}r's V)"

cramers_results <- map_dfr(dvs, function(dv) {
  v <- cramers_v(df$cluster, df[[dv]])
  ci <- bootstrap_v_ci(df$cluster, df[[dv]], n_boot = n_boot)
  tibble(Variable = dv, Cramers_V = v, CI_low = ci[1], CI_high = ci[2])
})

cramers_results %>%
  mutate(
    Variable = label_vars(Variable),
    across(c(Cramers_V, CI_low, CI_high), ~ round(.x, 2))
  ) %>%
  kable(booktabs = TRUE,
        col.names = c("Variable", "Cram\\'{e}r's V", "95\\% CI Low", "95\\% CI High"),
        escape = FALSE,
        align = c("l", rep("r", 3))) %>%
  kable_styling(latex_options = c("hold_position"), full_width = FALSE)
```

\noindent \textit{Note:} Cram\'{e}r's V with 95\% bootstrap CIs ($B$ = `r format(n_boot, big.mark = ",")`, seed = `r SEED`).

```{r fig-cramers-v}
#| label: fig-cramers-v
#| fig-cap: "Association Strength by Political Outcome"
#| fig-width: 8
#| fig-height: 5

cramers_plot <- cramers_results %>%
  mutate(Variable = label_vars(Variable))

p_cramers <- ggplot(cramers_plot, aes(x = reorder(Variable, Cramers_V),
                                       y = Cramers_V)) +
  geom_col(fill = "gray40", width = 0.6) +
  geom_errorbar(aes(ymin = CI_low, ymax = CI_high), width = 0.2) +
  geom_hline(yintercept = c(0.1, 0.3), linetype = "dashed",
             color = "gray60", alpha = 0.7) +
  coord_flip() +
  labs(x = NULL, y = expression("Cram\u00E9r's V")) +
  theme_minimal()

p_cramers

ggsave(here("figures", "fig-cramers-v.png"), p_cramers,
       width = 8, height = 5, dpi = 600)
```

## Domain-Level Summary

```{r tbl-domain-summary}
#| label: tbl-domain-summary
#| tbl-cap: "Domain Distinctiveness"

domain_summary <- pairwise_results %>%
  group_by(Variable) %>%
  summarise(
    Avg_abs_delta = mean(abs(Delta)),
    N_significant = sum(Significant == "Yes"),
    N_large = sum(Magnitude == "Large"),
    N_medium = sum(Magnitude == "Medium"),
    .groups = "drop"
  ) %>%
  arrange(desc(Avg_abs_delta))

domain_summary %>%
  mutate(
    Variable = label_vars(Variable),
    Avg_abs_delta = round(Avg_abs_delta, 2)
  ) %>%
  kable(booktabs = TRUE,
        col.names = c("Variable", "Mean $|\\delta|$", "\\# Sig.", "\\# Large", "\\# Medium"),
        escape = FALSE,
        align = c("l", rep("r", 4))) %>%
  kable_styling(latex_options = c("hold_position"), full_width = FALSE)
```

\noindent \textit{Note:} Sig. = number of pairwise comparisons (out of 6) with 95\% CI excluding zero. Magnitude thresholds follow Romano et al. (2006).

## Within-Orientation Heterogeneity

```{r tbl-cv-calculation}
#| label: tbl-cv-calculation
#| tbl-cap: "Within-Orientation Heterogeneity (Coefficient of Variation)"

cv_results <- map_dfr(dvs, function(dv) {
  df %>%
    filter(!is.na(!!sym(dv))) %>%
    group_by(cluster) %>%
    summarise(
      Variable = dv,
      Mean = mean(!!sym(dv)),
      SD = sd(!!sym(dv)),
      CV = SD / Mean,
      .groups = "drop"
    )
})

cv_wide <- cv_results %>%
  select(cluster, Variable, CV) %>%
  pivot_wider(names_from = Variable, values_from = CV)

cv_wide %>%
  mutate(across(-cluster, ~ round(.x, 2))) %>%
  kable(booktabs = TRUE,
        col.names = c("Orientation", label_vars(dvs)),
        align = c("l", rep("r", 5))) %>%
  kable_styling(latex_options = c("hold_position"), full_width = FALSE)
```

\noindent \textit{Note:} CV = coefficient of variation (SD/M). Higher values indicate greater within-orientation heterogeneity.

```{r cv-summary}
#| label: cv-summary

cv_summary <- cv_results %>%
  group_by(cluster) %>%
  summarise(Avg_CV = mean(CV), .groups = "drop") %>%
  arrange(desc(Avg_CV))

cat("=== Average CV by Orientation ===\n")
print(cv_summary %>% mutate(Avg_CV = round(Avg_CV, 2)))
cat("\nMost heterogeneous:", as.character(cv_summary$cluster[1]), "\n")
cat("Least heterogeneous:", as.character(cv_summary$cluster[nrow(cv_summary)]), "\n")
```

## Levene's Test

```{r tbl-levenes-test}
#| label: tbl-levenes-test
#| tbl-cap: "Levene's Test for Equality of Variances"

levene_results <- map_dfr(dvs, function(dv) {
  formula <- as.formula(paste(dv, "~ cluster"))
  test <- leveneTest(formula, data = df)
  tibble(
    Variable = dv,
    F_statistic = test$`F value`[1],
    df1 = test$Df[1],
    df2 = test$Df[2],
    p_value = test$`Pr(>F)`[1]
  )
})

levene_results %>%
  mutate(
    Variable = label_vars(Variable),
    F_statistic = round(F_statistic, 2),
    p_value = ifelse(p_value < 0.001, "< .001", sprintf("%.3f", p_value))
  ) %>%
  kable(booktabs = TRUE,
        col.names = c("Variable", "$F$", "df$_1$", "df$_2$", "$p$"),
        escape = FALSE,
        align = c("l", rep("r", 4))) %>%
  kable_styling(latex_options = c("hold_position"), full_width = FALSE)
```

\noindent \textit{Note:} Levene's test (Brown-Forsythe, median-based) for homogeneity of variances across orientations.

```{r variance-by-group}
#| label: variance-by-group

variance_by_group <- map_dfr(dvs, function(dv) {
  df %>%
    filter(!is.na(!!sym(dv))) %>%
    group_by(cluster) %>%
    summarise(Variable = dv, Variance = var(!!sym(dv)), .groups = "drop")
})

highest_var <- variance_by_group %>%
  group_by(Variable) %>%
  slice_max(Variance, n = 1) %>%
  select(Variable, Highest_Var_Cluster = cluster, Variance)

cat("=== Highest Variance Orientation by Variable ===\n")
print(highest_var %>% mutate(Variance = round(Variance, 2)))

demicultural_highest <- sum(highest_var$Highest_Var_Cluster == "Demicultural")
cat("\nDVs where Demicultural has highest variance:", demicultural_highest, "of 5\n")
```

\newpage

# Phase 3: Theoretical Contrasts

**Purpose:** Adjudicate between heritage and American dimensions as drivers
of political heterogeneity.

## Dimension Groupings

```{r dimension-groupings}
#| label: dimension-groupings

# High Heritage = Culture Affirming + Bicultural
# Low Heritage  = Assimilationist + Demicultural
# High American = Assimilationist + Bicultural
# Low American  = Culture Affirming + Demicultural

df <- df %>%
  mutate(
    high_heritage = cluster %in% c("Culture Affirming", "Bicultural"),
    high_american = cluster %in% c("Assimilationist", "Bicultural")
  )

cat("=== Heritage Dimension ===\n")
table(df$cluster, df$high_heritage)
cat("\n=== American Dimension ===\n")
table(df$cluster, df$high_american)
```

## Heritage Dimension Effects

```{r heritage-effects}
#| label: heritage-effects

heritage_results <- map_dfr(dvs, function(dv) {
  high <- df %>% filter(high_heritage) %>% pull(!!sym(dv))
  low  <- df %>% filter(!high_heritage) %>% pull(!!sym(dv))
  delta <- cliffs_delta(high, low)
  ci <- bootstrap_delta_ci(high, low, n_boot = n_boot)
  tibble(Variable = dv, Dimension = "Heritage", Delta = delta,
         CI_low = ci[1], CI_high = ci[2],
         Significant = ifelse(ci[1] > 0 | ci[2] < 0, "Yes", "No"))
})
```

## American Dimension Effects

```{r american-effects}
#| label: american-effects

american_results <- map_dfr(dvs, function(dv) {
  high <- df %>% filter(high_american) %>% pull(!!sym(dv))
  low  <- df %>% filter(!high_american) %>% pull(!!sym(dv))
  delta <- cliffs_delta(high, low)
  ci <- bootstrap_delta_ci(high, low, n_boot = n_boot)
  tibble(Variable = dv, Dimension = "American", Delta = delta,
         CI_low = ci[1], CI_high = ci[2],
         Significant = ifelse(ci[1] > 0 | ci[2] < 0, "Yes", "No"))
})
```

## Dimension Comparison

```{r tbl-dimension-contrasts}
#| label: tbl-dimension-contrasts
#| tbl-cap: "Dimension Contrasts (Heritage vs American)"

dimension_results <- bind_rows(heritage_results, american_results)

dimension_results %>%
  mutate(
    Variable = label_vars(Variable),
    across(c(Delta, CI_low, CI_high), ~ round(.x, 2)),
    CI = paste0("[", CI_low, ", ", CI_high, "]")
  ) %>%
  select(Dimension, Variable, Delta, CI, Significant) %>%
  arrange(Dimension, Variable) %>%
  kable(booktabs = TRUE,
        col.names = c("Dimension", "Variable", "$\\delta$", "95\\% CI",
                       "Significant"),
        escape = FALSE,
        align = c("l", "l", "r", "c", "c")) %>%
  kable_styling(latex_options = c("hold_position", "scale_down"),
                full_width = FALSE)
```

\noindent \textit{Note:} Cliff's $\delta$ comparing high vs. low on each acculturation dimension. Heritage: Culture Affirming + Bicultural vs. Assimilationist + Demicultural. American: Assimilationist + Bicultural vs. Culture Affirming + Demicultural.

```{r dimension-summary}
#| label: dimension-summary

dimension_summary <- dimension_results %>%
  group_by(Dimension) %>%
  summarise(
    Avg_abs_delta = mean(abs(Delta)),
    N_significant = sum(Significant == "Yes"),
    .groups = "drop"
  )

cat("=== Dimension Summary ===\n")
print(dimension_summary %>% mutate(Avg_abs_delta = round(Avg_abs_delta, 2)))

heritage_avg <- dimension_summary %>%
  filter(Dimension == "Heritage") %>% pull(Avg_abs_delta)
american_avg <- dimension_summary %>%
  filter(Dimension == "American") %>% pull(Avg_abs_delta)
ratio <- heritage_avg / american_avg

cat("\nHeritage/American ratio:", round(ratio, 2), "\n")
if (ratio > 1) {
  cat("Heritage dimension shows STRONGER effects than American dimension\n")
} else {
  cat("American dimension shows STRONGER effects than Heritage dimension\n")
}
```

```{r fig-dimension-effects}
#| label: fig-dimension-effects
#| fig-cap: "Dimension Effects Comparison"
#| fig-width: 9
#| fig-height: 5

dimension_plot <- dimension_results %>%
  mutate(Variable = label_vars(Variable))

p_dimensions <- ggplot(dimension_plot, aes(x = Variable, y = Delta,
                                            fill = Dimension)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
  geom_col(position = position_dodge(width = 0.8), width = 0.7, color = "black",
           linewidth = 0.3) +
  geom_errorbar(aes(ymin = CI_low, ymax = CI_high),
                position = position_dodge(width = 0.8), width = 0.2) +
  scale_fill_manual(values = c("Heritage" = "gray30", "American" = "gray70")) +
  labs(x = NULL, y = expression("Cliff's " * delta * " (High vs Low)")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        legend.position = "bottom")

p_dimensions

ggsave(here("figures", "fig-dimension-effects.png"), p_dimensions,
       width = 9, height = 5, dpi = 600)
```

## Key Contrast: Assimilationist vs Bicultural

```{r key-contrast}
#| label: key-contrast

cat("=== Key Contrast: Assimilationist vs Bicultural ===\n")
cat("(Both High American; differ on Heritage)\n")
cat("Interpretation: Heritage Maintenance Effect\n\n")

key_contrast <- pairwise_results %>%
  filter(
    (Group1 == "Assimilationist" & Group2 == "Bicultural") |
    (Group1 == "Bicultural" & Group2 == "Assimilationist")
  )

key_contrast %>%
  mutate(
    Variable = label_vars(Variable),
    across(c(Delta, CI_low, CI_high), ~ round(.x, 2)),
    CI = paste0("[", CI_low, ", ", CI_high, "]")
  ) %>%
  select(Variable, Delta, CI, Significant, Magnitude) %>%
  kable(booktabs = TRUE,
        col.names = c("Variable", "$\\delta$", "95\\% CI",
                       "Significant", "Magnitude"),
        escape = FALSE,
        align = c("l", "r", "c", "c", "l")) %>%
  kable_styling(latex_options = c("hold_position"), full_width = FALSE)
```

\noindent \textit{Note:} Assimilationist and Bicultural both score high on the American dimension; differences isolate the effect of heritage cultural maintenance.

## Quadrant Figure

```{r fig-quadrant}
#| label: fig-quadrant
#| fig-cap: "Acculturation Orientations in Two-Dimensional Space"
#| fig-width: 7
#| fig-height: 6

quadrant_data <- tibble(
  cluster = factor(
    c("Culture Affirming", "Assimilationist", "Bicultural", "Demicultural"),
    levels = levels(df$cluster)
  ),
  Heritage = c(1, 0, 1, 0),
  American = c(0, 1, 1, 0)
) %>%
  mutate(
    x = ifelse(American == 1, 0.75, 0.25),
    y = ifelse(Heritage == 1, 0.75, 0.25)
  ) %>%
  left_join(cluster_ns %>% select(cluster, n), by = "cluster")

p_quadrant <- ggplot(quadrant_data, aes(x = x, y = y)) +
  geom_rect(aes(xmin = 0, xmax = 0.5, ymin = 0.5, ymax = 1),
            fill = "gray90", alpha = 0.5) +
  geom_rect(aes(xmin = 0.5, xmax = 1, ymin = 0.5, ymax = 1),
            fill = "gray80", alpha = 0.5) +
  geom_rect(aes(xmin = 0, xmax = 0.5, ymin = 0, ymax = 0.5),
            fill = "gray95", alpha = 0.5) +
  geom_rect(aes(xmin = 0.5, xmax = 1, ymin = 0, ymax = 0.5),
            fill = "gray90", alpha = 0.5) +
  geom_hline(yintercept = 0.5, linetype = "dashed") +
  geom_vline(xintercept = 0.5, linetype = "dashed") +
  geom_point(size = 10, color = "black", fill = "white", shape = 21) +
  geom_text(aes(label = paste0(cluster, "\n(n=", n, ")")),
            size = 3, fontface = "bold") +
  scale_x_continuous(limits = c(0, 1), breaks = c(0.25, 0.75),
                     labels = c("Low", "High"), expand = c(0.05, 0.05)) +
  scale_y_continuous(limits = c(0, 1), breaks = c(0.25, 0.75),
                     labels = c("Low", "High"), expand = c(0.05, 0.05)) +
  labs(x = "American Cultural Orientation",
       y = "Heritage Cultural Orientation") +
  theme_minimal() +
  theme(panel.grid = element_blank(),
        axis.text = element_text(size = 10, face = "bold"))

p_quadrant

ggsave(here("figures", "fig-quadrant.png"), p_quadrant,
       width = 7, height = 6, dpi = 600)
```

\newpage

# Phase 4: Hypothesis Evaluation

```{r tbl-hypothesis-summary}
#| label: tbl-hypothesis-summary
#| tbl-cap: "Hypothesis Evaluation Summary"

# H1: Distinct profiles
h1_kw_significant <- sum(kw_results$p_value < 0.05)
h1_pairwise_significant <- sum(pairwise_results$Significant == "Yes")
h1_result <- ifelse(h1_kw_significant >= 3 & h1_pairwise_significant >= 10,
                    "Supported", "Not Supported")

# H2a: Culture Affirming = most liberal/Dem/pro-immigrant
ca_most_liberal <- ideology_dist$cluster[which.max(ideology_dist$pct_liberal)] ==
  "Culture Affirming"
ca_most_dem <- partyid_dist$cluster[which.max(partyid_dist$pct_democrat)] ==
  "Culture Affirming"
ca_most_pro_imm <- immview_dist$cluster[which.max(immview_dist$pct_strengthen)] ==
  "Culture Affirming"
h2a_result <- ifelse(sum(ca_most_liberal, ca_most_dem, ca_most_pro_imm) >= 2,
                     "Supported", "Partially Supported")

# H2b: Assimilationist = most conservative/Rep/restrictive
assim_most_cons <- ideology_dist$cluster[which.max(ideology_dist$pct_conservative)] ==
  "Assimilationist"
assim_most_rep <- partyid_dist$cluster[which.max(partyid_dist$pct_republican)] ==
  "Assimilationist"
assim_least_pro_imm <- immview_dist$cluster[which.min(immview_dist$pct_strengthen)] ==
  "Assimilationist"
h2b_result <- ifelse(sum(assim_most_cons, assim_most_rep, assim_least_pro_imm) >= 2,
                     "Supported", "Partially Supported")

# H2c: Bicultural distinct from both poles
bicultural_sig <- pairwise_results %>%
  filter(Group1 == "Bicultural" | Group2 == "Bicultural") %>%
  filter(Significant == "Yes") %>%
  nrow()
h2c_result <- ifelse(bicultural_sig >= 5, "Supported", "Partially Supported")

# H2d: Demicultural = highest within-group variance
demicultural_highest_var <- sum(highest_var$Highest_Var_Cluster == "Demicultural")
demicultural_highest_cv <- as.character(cv_summary$cluster[1]) == "Demicultural"
h2d_result <- ifelse(demicultural_highest_var >= 3 | demicultural_highest_cv,
                     "Supported", "Not Supported")

hypothesis_summary <- tibble(
  Hypothesis = c(
    "H1: Distinct profiles across orientations",
    "H2a: Culture Affirming = most liberal/Dem/pro-immigrant",
    "H2b: Assimilationist = most conservative/Rep/restrictive",
    "H2c: Bicultural distinct from both poles",
    "H2d: Demicultural = highest within-group variance"
  ),
  Test = c(
    "Omnibus (K-W) + pairwise $\\delta$",
    "Profile comparison",
    "Profile comparison",
    "Key contrast + pairwise $\\delta$",
    "CV + Levene's test"
  ),
  Result = c(h1_result, h2a_result, h2b_result, h2c_result, h2d_result),
  Evidence = c(
    paste0(h1_kw_significant, "/5 K-W sig.; ",
           h1_pairwise_significant, "/30 pairwise sig."),
    paste0("Most liberal: ", ca_most_liberal, "; Most Dem: ", ca_most_dem,
           "; Most pro-imm: ", ca_most_pro_imm),
    paste0("Most cons: ", assim_most_cons, "; Most Rep: ", assim_most_rep,
           "; Least pro-imm: ", assim_least_pro_imm),
    paste0(bicultural_sig, " significant pairwise comparisons"),
    paste0(demicultural_highest_var, "/5 DVs highest var; Highest avg CV: ",
           as.character(cv_summary$cluster[1]))
  )
)

hypothesis_summary %>%
  kable(booktabs = TRUE,
        col.names = c("Hypothesis", "Test", "Result", "Evidence"),
        escape = FALSE,
        align = c("l", "l", "c", "l")) %>%
  kable_styling(latex_options = c("hold_position", "scale_down"),
                full_width = FALSE, font_size = 8) %>%
  column_spec(1, width = "2.5in") %>%
  column_spec(4, width = "2in")
```

\noindent \textit{Note:} Hypothesis support criteria: H1 requires $\geq$ 3/5 significant omnibus tests and $\geq$ 10/30 significant pairwise effects. H2a--H2c require $\geq$ 2/3 directional indicators. H2d requires $\geq$ 3/5 highest-variance indicators or highest average CV.

```{r hypothesis-narrative}
#| label: hypothesis-narrative

cat("=== HYPOTHESIS EVALUATION SUMMARY ===\n\n")

cat("H1: Distinct profiles across orientations\n")
cat("  Result:", h1_result, "\n")
cat("  - Kruskal-Wallis significant:", h1_kw_significant, "of 5 DVs\n")
cat("  - Pairwise comparisons significant:", h1_pairwise_significant, "of 30\n\n")

cat("H2a: Culture Affirming = most liberal/Dem/pro-immigrant\n")
cat("  Result:", h2a_result, "\n\n")

cat("H2b: Assimilationist = most conservative/Rep/restrictive\n")
cat("  Result:", h2b_result, "\n\n")

cat("H2c: Bicultural distinct from both poles\n")
cat("  Result:", h2c_result, "\n")
cat("  - Significant pairwise effects:", bicultural_sig, "\n\n")

cat("H2d: Demicultural = highest within-group variance\n")
cat("  Result:", h2d_result, "\n")
cat("  - DVs with highest variance:", demicultural_highest_var, "of 5\n")
cat("  - Most heterogeneous (avg CV):", as.character(cv_summary$cluster[1]), "\n")
```

\newpage

# Save Results

```{r save-results}
#| label: save-results

# Save all computed objects so Manuscript_Figures.qmd and Appendix.qmd
# can load them directly without re-running bootstrap
save(
  # Data
  df, n_total, cluster_ns, dvs,
  # Phase 1: distributions
  ideology_dist, partyid_dist, immview_dist, dreamact_dist, immpolicy_dist,
  summary_profiles,
  # Phase 2: between-orientation
  kw_results, pairwise_results, pairwise_results_full,
  cramers_results, domain_summary,
  cv_results, cv_wide, cv_summary,
  levene_results, variance_by_group,
  # Phase 3: dimension contrasts
  heritage_results, american_results, dimension_results, dimension_summary,
  key_contrast,
  # Phase 4: hypothesis evaluation
  hypothesis_summary,
  # Helper functions
  cliffs_delta, bootstrap_delta_ci, cramers_v, bootstrap_v_ci,
  # Parameters
  SEED, n_boot,
  file = here("data", "processed", "phase3_results.rda")
)

cat("Phase 3 results saved to data/processed/phase3_results.rda\n")
```

\newpage

# Session Info

```{r session-info}
#| label: session-info

sessionInfo()
```
